/*
 * DocHandler.java
 *
 * Created on March 29, 2008, 10:34 AM
 *
 * To change this template, choose Tools | Template Manager
 * and open the template in the editor.
 */

package com.afaker.searth.core.index.handler;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.poi.hwpf.extractor.WordExtractor;

/**
 *
 * @author bruce
 */
public class DocHandler extends  DocumentHandler{
    private WordExtractor extrctor;
    /** Creates a new instance of DocHandler */
    public DocHandler() {
    }
    
    public Document getDocument(File file) {
        FileInputStream input = null;
        try {
            input = new FileInputStream(file);
        } catch (FileNotFoundException ex) {
            ex.printStackTrace();
        }
        Document doc = new Document();
        String text = "";
        if(input != null) {
            try {
                extrctor = new WordExtractor(input);
            } catch (IOException ex) {
                ex.printStackTrace();
            }
            text = extrctor.getText();
        }
        doc.add(new Field("path",file.getAbsolutePath(),
                Field.Store.YES,Field.Index.TOKENIZED));
        doc.add(new Field("content",new StringReader(file.getAbsolutePath() +
                text.trim())));
        String size = String.valueOf(file.length()/1024.0) + "KB";
        float s = file.length()/1024.0f/1024.0f;
        float l = file.length()/1024.0f;
        if(s >= 1)
            size = String.valueOf(s) + "M";
        else
            size = String.valueOf(l) + "KB";
        doc.add(new Field("format","doc",Field.Store.YES,Field.Index.TOKENIZED));
        doc.add(new Field("size",size,Field.Store.YES,Field.Index.UN_TOKENIZED));
        doc.add(new Field("modified",String.valueOf(file.lastModified()),
                //DateTools.timeToString(file.lastModified(), DateTools.Resolution.MINUTE),
                Field.Store.YES, Field.Index.UN_TOKENIZED));
        return doc;
    }
    public static void main(String args[]) {
        DocHandler h = new DocHandler();
        h.getDocument(new File("/home/bruce/a.doc"));
    }
}
